home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-07-07 | 4.5 KB | 170 lines | [TEXT/MPS ] |
- (* A generic lexer *)
-
- (**) #open "float";;
- (**) #open "int";;
- (**) #open "ref";;
- (**) #open "exc";;
- (**) #open "list";;
- (**) #open "fchar";;
- (**) #open "fstring";;
- (**) #open "stream";;
-
- (* The string buffering machinery *)
-
- let initial_buffer = create_string 32;;
-
- let buffer = ref initial_buffer;;
- let bufpos = ref 0;;
-
- let reset_buffer () =
- buffer := initial_buffer;
- bufpos := 0
- ;;
-
- let store c =
- if !bufpos >= string_length !buffer then begin
- let newbuffer = create_string (2 * !bufpos) in
- blit_string !buffer 0 newbuffer 0 !bufpos;
- buffer := newbuffer
- end;
- set_nth_char !buffer !bufpos c;
- incr bufpos
- ;;
-
- let get_string () =
- let s = sub_string !buffer 0 !bufpos in buffer := initial_buffer; s
- ;;
-
- #ifdef unix
- #define ACCENTED `\192`..`\255`
- #endif
- #ifdef macintosh
- #define ACCENTED `\128`..`\160`|`\174`|`\175`|`\190`|`\191`|`\203`|`\207`|\
- `\216`|`\217`|`\222`|`\223`|`\229`..`\239`|`\241`..`\244`
- #endif
- #ifdef msdos
- #define ACCENTED `\128`..`\154`|`\160`..`\165`
- #endif
-
- (* The lexer *)
-
- let make_lexer keywords =
-
- let kwd_table = hashtbl__new 17 in
- do_list (fun s -> hashtbl__add kwd_table s (Kwd s)) keywords;
-
- let ident_or_keyword id =
- try hashtbl__find kwd_table id with Not_found -> Ident id
- and keyword_or_error c =
- let s = make_string 1 c in
- try hashtbl__find kwd_table s
- with Not_found -> raise Parse_error in
-
- let rec next_token = function
- [< '(*'*) ` `|`\010`|`\013`|`\009`|`\026`|`\012`; s >] ->
- next_token s
- | [< '(*'*) `A`..`Z`|`a`..`z`|ACCENTED as c; s>] ->
- reset_buffer(); store c; ident s
- | [< '(*'*) `!`|`%`|`&`|`$`|`#`|`+`|`/`|`:`|`<`|`=`|`>`|`?`|`@`|`\\`|
- `~`|`^`|`|`|`*` as c; s >] ->
- reset_buffer(); store c; ident2 s
- | [< '(*'*) `0`..`9` as c; s>] ->
- reset_buffer(); store c; number s
- | [< '(*'*) `\``; char c; '(*'*) `\`` >] ->
- Char c
- | [< ' `"` (*'*); s >] ->
- reset_buffer(); String(string s)
- | [< '(*'*) `-`; s >] ->
- neg_number s
- | [< '(*'*) `(`; s >] ->
- maybe_comment s
- | [< '(*'*) c >] ->
- keyword_or_error c
-
- and ident = function
- [< '(*'*) `A`..`Z`|`a`..`z`|ACCENTED|`0`..`9`|`_`|`'` (*'*) as c; s>] ->
- store c; ident s
- | [< >] ->
- ident_or_keyword(get_string())
-
- and ident2 = function
- [< '(*'*) `!`|`%`|`&`|`$`|`#`|`+`|`-`|`/`|`:`|`<`|`=`|`>`|`?`|`@`|`\\`|
- `~`|`^`|`|`|`*` as c; s >] ->
- store c; ident2 s
- | [< >] ->
- ident_or_keyword(get_string())
-
- and neg_number = function
- [< '(*'*) `0`..`9` as c; s >] ->
- reset_buffer(); store `-`; store c; number s
- | [< s >] ->
- reset_buffer(); store `-`; ident2 s
-
- and number = function
- [< '(*'*) `0`..`9` as c; s >] ->
- store c; number s
- | [< '(*'*) `.`; s >] ->
- store `.`; decimal_part s
- | [< '(*'*) `e`|`E`; s >] ->
- store `E`; exponent_part s
- | [< >] ->
- Int(int_of_string(get_string()))
-
- and decimal_part = function
- [< '(*'*) `0`..`9` as c; s >] ->
- store c; decimal_part s
- | [< '(*'*) `e`|`E`; s >] ->
- store `E`; exponent_part s
- | [< >] ->
- Float(float_of_string(get_string()))
-
- and exponent_part = function
- [< '(*'*) `+`|`-` as c; s >] ->
- store c; end_exponent_part s
- | [< s >] ->
- end_exponent_part s
-
- and end_exponent_part = function
- [< '(*'*) `0`..`9` as c; s >] ->
- store c; end_exponent_part s
- | [< >] ->
- Float(float_of_string(get_string()))
-
- and string = function
- [< ' `"` (*'*) >] -> get_string()
- | [< '(*'*) `\\`; escape c; s >] -> store c; string s
- | [< '(*'*) c; s >] -> store c; string s
-
- and char = function
- [< '(*'*) `\\`; escape c >] -> c
- | [< '(*'*) c >] -> c
-
- and escape = function
- [< '(*'*) `n` >] -> `\n`
- | [< '(*'*) `r` >] -> `\r`
- | [< '(*'*) `t` >] -> `\t`
- | [< '(*'*) `0`..`9` as c1; '(*'*) `0`..`9` as c2; '(*'*) `0`..`9` as c3 >] ->
- char_of_int((int_of_char c1 - 48) * 100 +
- (int_of_char c2 - 48) * 10 + (int_of_char c3))
- | [< '(*'*) c >] -> c
-
- and maybe_comment = function
- [< '(*'*) `*`; s >] -> comment s; next_token s
- | [< >] -> keyword_or_error `(`
-
- and comment = function
- [< '(*'*) `(`; s >] -> maybe_nested_comment s
- | [< '(*'*) `*`; s >] -> maybe_end_comment s
- | [< '(*'*) c; s >] -> comment s
-
- and maybe_nested_comment = function
- [< '(*'*) `*`; s >] -> comment s; comment s
- | [< '(*'*) c; s >] -> comment s
-
- and maybe_end_comment = function
- [< '(*'*) `)` >] -> ()
- | [< '(*'*) c; s >] -> comment s
-
- in fun input -> stream_from (fun () -> next_token input)
- ;;
-